knitr::opts_chunk$set(echo = TRUE)

mRNA overview

We genrated a list of summary statistics for the mRNA reads.

nohup zsh dmel_mRNA_overview.sh > ../logs/dmel_mRNA_overview.log
#!/bin/bash

pyscript="/Volumes/Data/Projects/dmelR2_p-ele/scripts/mRNA-overview.py"
input_dir="/Volumes/Data/Projects/dmelR2_p-ele/rna/run2/map-GSNAP/output"
output_dir="/Volumes/Data/Projects/dmelR2_p-ele/rna/run2/mRNA/overview"

samtools view $input_dir/gt_R1G6.sort.bam | python $pyscript --sam - --sample-id R1-G6-wf   > $output_dir/wf_R1G6.txt
samtools view $input_dir/gt_R2G6.sort.bam | python $pyscript --sam - --sample-id R2-G6-wf   > $output_dir/wf_R2G6.txt
samtools view $input_dir/gt_R3G6.sort.bam | python $pyscript --sam - --sample-id R3-G6-wf   > $output_dir/wf_R3G6.txt

samtools view $input_dir/gt_R1G15.sort.bam | python $pyscript --sam - --sample-id R1-G15-wf   > $output_dir/wf_R1G15.txt
samtools view $input_dir/gt_R2G15.sort.bam | python $pyscript --sam - --sample-id R2-G15-wf   > $output_dir/wf_R2G15.txt
samtools view $input_dir/gt_R3G15.sort.bam | python $pyscript --sam - --sample-id R3-G15-wf   > $output_dir/wf_R3G15.txt

samtools view $input_dir/gt_R1G21.sort.bam | python $pyscript --sam - --sample-id R1-G21-wf   > $output_dir/wf_R1G21.txt
samtools view $input_dir/gt_R2G21.sort.bam | python $pyscript --sam - --sample-id R2-G21-wf   > $output_dir/wf_R2G21.txt
samtools view $input_dir/gt_R3G21.sort.bam | python $pyscript --sam - --sample-id R3-G21-wf   > $output_dir/wf_R3G21.txt

samtools view $input_dir/gt_R1G30.sort.bam | python $pyscript --sam - --sample-id R1-G30-wf   > $output_dir/wf_R1G30.txt
samtools view $input_dir/gt_R2G30.sort.bam | python $pyscript --sam - --sample-id R2-G30-wf   > $output_dir/wf_R2G30.txt
samtools view $input_dir/gt_R3G30.sort.bam | python $pyscript --sam - --sample-id R3-G30-wf   > $output_dir/wf_R3G30.txt

samtools view $input_dir/gt_R1G40.sort.bam | python $pyscript --sam - --sample-id R1-G40-wf   > $output_dir/wf_R1G40.txt
samtools view $input_dir/gt_R2G40.sort.bam | python $pyscript --sam - --sample-id R2-G40-wf   > $output_dir/wf_R2G40.txt
samtools view $input_dir/gt_R3G40.sort.bam | python $pyscript --sam - --sample-id R3-G40-wf   > $output_dir/wf_R3G40.txt

# Combine outputs into single file, separating ID.
# cat *.txt|perl -pe 's/-/\t/g'
# cat *.txt|perl -pe 's/-/\t/'|perl -pe 's/-/\t/' > dmel_all.forr 

Then visualised the most relevant ones in ggplot2.

library(ggplot2)
library(scales)
library(dplyr)

overview <- read.table("/Volumes/Data/Projects/dmelR2_p-ele/rna/run2/mRNA/overview_genome/dmel_all.forr", 
                       header = FALSE, sep = "\t")
colnames(overview) <- c("Replicate", "Generation", "Tissue", "Reads", "MappedReads", "MappedReadsWithMinMQ", 
                        "SenseGene", "AntisenseGene", "SenseTranscriptExon", "AntisenseTranscriptExon", 
                        "SensePelement", "AntisensePelement")

generation_order <- c("G6", "G15", "G21", "G30", "G40")

overview$Generation <- factor(overview$Generation, levels = generation_order)

overview$AntisenseGene <- -overview$AntisenseGene
overview$AntisensePelement <- -overview$AntisensePelement

mapped_reads_plot <- ggplot(overview, aes(x = Generation)) +
  geom_bar(aes(y = MappedReads, fill = "Mapped reads"), stat = "identity", position = "dodge") +
  geom_bar(aes(y = MappedReadsWithMinMQ, fill = "Mapped reads w/ min mq"), stat = "identity", position = "dodge") +
  facet_wrap(. ~ Replicate, ncol = 1) +
  labs(x = "Generation", y = "Read counts") +
  ggtitle("Mapped reads & mapped reads w/ min mq") +
  scale_fill_manual(values = c("Mapped reads" = "darkseagreen3", "Mapped reads w/ min mq" = "khaki3"),
                    name = NULL,
                    breaks = c("Mapped reads", "Mapped reads w/ min mq"),
                    labels = c("Mapped reads", "Mapped reads w/ min mq"),
                    guide = guide_legend(reverse = TRUE)) +
                    theme_bw() +
                    theme(legend.position = "bottom") +
                    scale_y_continuous(labels = label_number(scale = 1e-6, suffix = "M"))

sense_antisense_pelement_plot <- ggplot(overview, aes(x = Generation, y = SensePelement, fill = "Sense")) +
  geom_bar(stat = "identity") +
  geom_bar(aes(y = AntisensePelement, fill = "Antisense"), stat = "identity") +
  facet_wrap(. ~ Replicate, ncol = 1) +
  labs(x = "Generation", y = NULL) +
  ggtitle("Sense/antisense P-element read counts") +
  scale_fill_manual(values = c("Sense" = "lightblue", "Antisense" = "darksalmon"),
                    name = NULL,
                    breaks = c("Sense", "Antisense"),
                    labels = c("Sense", "Antisense"),
                    guide = guide_legend(reverse = TRUE)) +
                    theme_bw() +
                    theme(legend.position = "bottom") +
                    scale_y_continuous(labels = label_number(scale = 1e-3, suffix = "k"))

combined_plot <- cowplot::plot_grid(mapped_reads_plot, sense_antisense_pelement_plot, ncol = 3)

ggsave("figs/mRNA_sum_stats.png", combined_plot, width = 20, height = 10, dpi = 600)

knitr::include_graphics("figs/mRNA_sum_stats.png")

We then looked at the total expression levels of selected genes across all generations for each of the three replicates.

library(ggplot2)
theme_set(theme_bw())

genes<-c("PPI251","FBtr0083183_mRNA","FBtr0088034_mRNA","FBtr0086904_mRNA","FBtr0087984_mRNA","FBtr0087189_mRNA","FBtr0080497_mRNA","FBtr0079489_mRNA","FBtr0445185_mRNA","FBtr0080316_mRNA","FBtr0075559_mRNA","FBtr0100641_mRNA","FBtr0080165_mRNA","FBtr0081502_mRNA","FBtr0073637_mRNA","FBtr0080166_mRNA","FBtr0301669_mRNA","FBtr0086897_mRNA","FBtr0085594_mRNA","FBtr0329922_mRNA","FBtr0081328_mRNA")
sc<-c("p-element","spnE","cuff","dcr-2","hen1","krimp","loqs","r2d2","vas",
      "zuc","ago2-RB","armi-RB","aub","del","moon","piwi","tj-RB","rhino","rpl32","qin-RB","lok")

h<-read.table("/Volumes/Data/Projects/dmelR2_p-ele/rna/run2/mRNA/overview_genome/dmel_all.forr")        

names(h)<-c("rep","time","tissue","strand","gene","pos","cov")

for(i in 1:length(genes))
{
  target<-genes[i]

  a<-subset(h,gene==target)
  a$time <- factor(a$time, levels=c("G6", "G15", "G21", "G30","G40"))
  
  s<-subset(a,strand=="se")
  as<-subset(a,strand=="ase")
}
  
mRNA_plot <- ggplot() +
    geom_polygon(data=s,mapping=aes(x=pos, y=cov), fill='lightblue', color='lightblue') +
    geom_polygon(data=as, aes(x=pos, y=-cov), fill='darksalmon', color='darksalmon')+
    facet_grid(time~rep)+
    ylab("coverage")+
    xlab("position")

ggsave("figs/mRNA_overview.png", mRNA_plot, width = 18, height = 6, dpi = 800)

knitr::include_graphics("figs/mRNA_overview.png")

Then we did the same again but this time looking at the expression levels of each of the selected genes individually, still across generations for each replicate.

library(ggplot2)
library(grid)
theme_set(theme_bw())

genes <- c("PPI251", "FBtr0083183_mRNA", "FBtr0088034_mRNA", "FBtr0086904_mRNA", "FBtr0087984_mRNA", "FBtr0087189_mRNA", "FBtr0080497_mRNA", "FBtr0079489_mRNA", "FBtr0445185_mRNA", "FBtr0080316_mRNA", "FBtr0075559_mRNA", "FBtr0100641_mRNA", "FBtr0080165_mRNA", "FBtr0081502_mRNA", "FBtr0073637_mRNA", "FBtr0080166_mRNA", "FBtr0301669_mRNA", "FBtr0086897_mRNA", "FBtr0085594_mRNA", "FBtr0329922_mRNA", "FBtr0081328_mRNA")
sc <- c("p-element", "spnE", "cuff", "dcr-2", "hen1", "krimp", "loqs", "r2d2", "vas", "zuc", "ago2-RB", "armi-RB", "aub", "del", "moon", "piwi", "tj-RB", "rhino", "rpl32", "qin-RB", "lok")

h <- read.table("/Volumes/Data/Projects/dmelR2_p-ele/rna/run2/splicing-expression/raw_expression/expr-spli.forr")
names(h) <- c("rep", "time", "tissue", "strand", "gene", "pos", "cov")

num_genes <- length(genes)

num_cols <- 5
num_rows <- ceiling(num_genes / num_cols)

plot_new <- function() {
  grid.newpage()
  pushViewport(viewport(layout = grid.layout(num_rows, num_cols)))
}

# Counter for genes
gene_counter <- 1

plot_new()  # Initial plot

for (i in 1:num_rows) {
  for (j in 1:num_cols) {
    # Check for more genes
    if (gene_counter <= num_genes) {
      target <- genes[gene_counter]
      short <- sc[gene_counter]
      
      a <- subset(h, gene == target)
      a$time <- factor(a$time, levels = c("G6", "G15", "G21", "G30", "G40"))
      
      s <- subset(a, strand == "se")
      as <- subset(a, strand == "ase")
      
      pushViewport(viewport(layout.pos.row = i, layout.pos.col = j))
      
      plot <- ggplot() +
        geom_polygon(data = s, mapping = aes(x = pos, y = cov), fill = 'lightblue', color = 'lightblue') +
        geom_polygon(data = as, aes(x = pos, y = -cov), fill = 'darksalmon', color = 'darksalmon') +
        facet_grid(time ~ rep) +
        ylab("coverage") +
        xlab("position") +
        ggtitle(paste(target, "-", short))
      
      print(plot)
      
      gene_counter <- gene_counter + 1
    }
  }
}

featurecounts

vasa, tj, armi

library(ggplot2)

# Read the output file from featureCounts
counts_file <- "/Volumes/Data/Projects/dmelR2_p-ele/rna/run2/map-star/fc/fc_R1G6.txt"
counts_data <- read.table(counts_file, header = TRUE, stringsAsFactors = FALSE)

# Select the genes of interest
selected_genes <- c("FBgn0283442", "FBgn0000964", "FBgn0041164")

# Subset the data for selected genes
selected_data <- counts_data[counts_data$Geneid %in% selected_genes, ]

# Plot expression levels
ggplot(selected_data, aes(x = Geneid, y = "../Aligned.out.sam")) +
  geom_bar(stat = "identity", fill = "steelblue") +
  xlab("Gene") +
  ylab("Expression Level") +
  ggtitle("Expression Levels of Selected Genes") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))